To run this code in my project using the renv environment, run the following lines of code
install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile
require("genefilter")
require("DESeq2")
require("apeglm")
require("ashr")
require("ggplot2")
require("vsn")
require("hexbin")
require("pheatmap")
require("RColorBrewer")
require("EnhancedVolcano")
require("rtracklayer")
require("tidyverse")
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.3 forcats_1.0.0
## [3] stringr_1.5.1 dplyr_1.1.4
## [5] purrr_1.0.2 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.2.1
## [9] tidyverse_2.0.0 rtracklayer_1.62.0
## [11] EnhancedVolcano_1.18.0 ggrepel_0.9.6
## [13] RColorBrewer_1.1-3 pheatmap_1.0.12
## [15] hexbin_1.28.5 vsn_3.68.0
## [17] ggplot2_3.5.1 ashr_2.2-63
## [19] apeglm_1.22.1 DESeq2_1.40.2
## [21] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [23] MatrixGenerics_1.12.3 matrixStats_1.4.1
## [25] GenomicRanges_1.54.1 GenomeInfoDb_1.36.4
## [27] IRanges_2.34.1 S4Vectors_0.38.2
## [29] BiocGenerics_0.46.0 genefilter_1.82.1
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 bitops_1.0-9 rlang_1.1.4
## [4] magrittr_2.0.3 compiler_4.3.2 RSQLite_2.3.9
## [7] png_0.1-8 vctrs_0.6.5 pkgconfig_2.0.3
## [10] crayon_1.5.3 fastmap_1.2.0 XVector_0.40.0
## [13] utf8_1.2.4 Rsamtools_2.18.0 rmarkdown_2.28
## [16] tzdb_0.4.0 preprocessCore_1.62.1 bit_4.5.0
## [19] xfun_0.48 zlibbioc_1.46.0 cachem_1.1.0
## [22] jsonlite_1.8.9 blob_1.2.4 DelayedArray_0.26.7
## [25] BiocParallel_1.34.2 irlba_2.3.5.1 parallel_4.3.2
## [28] R6_2.5.1 stringi_1.8.4 bslib_0.8.0
## [31] limma_3.56.2 SQUAREM_2021.1 jquerylib_0.1.4
## [34] numDeriv_2016.8-1.1 Rcpp_1.0.13-1 knitr_1.48
## [37] timechange_0.3.0 Matrix_1.6-5 splines_4.3.2
## [40] tidyselect_1.2.1 rstudioapi_0.17.0 abind_1.4-8
## [43] yaml_2.3.10 codetools_0.2-20 affy_1.78.2
## [46] lattice_0.22-6 plyr_1.8.9 withr_3.0.1
## [49] KEGGREST_1.40.1 coda_0.19-4.1 evaluate_1.0.1
## [52] survival_3.7-0 Biostrings_2.70.3 pillar_1.9.0
## [55] affyio_1.70.0 BiocManager_1.30.25 renv_1.0.11
## [58] generics_0.1.3 invgamma_1.1 RCurl_1.98-1.16
## [61] truncnorm_1.0-9 emdbook_1.3.13 hms_1.1.3
## [64] munsell_0.5.1 scales_1.3.0 xtable_1.8-4
## [67] glue_1.8.0 tools_4.3.2 BiocIO_1.12.0
## [70] GenomicAlignments_1.38.2 annotate_1.78.0 locfit_1.5-9.10
## [73] mvtnorm_1.3-2 XML_3.99-0.17 grid_4.3.2
## [76] bbmle_1.0.25.1 bdsmatrix_1.3-7 AnnotationDbi_1.64.1
## [79] colorspace_2.1-1 GenomeInfoDbData_1.2.10 restfulr_0.0.15
## [82] cli_3.6.3 fansi_1.0.6 mixsqp_0.3-54
## [85] S4Arrays_1.0.6 gtable_0.3.5 sass_0.4.9
## [88] digest_0.6.37 SparseArray_1.2.4 rjson_0.2.23
## [91] memoise_2.0.1 htmltools_0.5.8.1 lifecycle_1.0.4
## [94] httr_1.4.7 bit64_4.5.2 MASS_7.3-60.0.1
#set standard output directory for figures
outdir <- "../output_RNA/differential_expression"
save_ggplot <- function(plot, filename, width = 10, height = 7, units = "in", dpi = 300,bg=NULL) {
print(plot)
png_path <- file.path(outdir, paste0(filename, ".png"))
pdf_dir <- file.path(outdir, "pdf_figs")
pdf_path <- file.path(pdf_dir, paste0(filename, ".pdf"))
# Ensure the pdf_figs directory exists
if (!dir.exists(pdf_dir)) dir.create(pdf_dir, recursive = TRUE)
# Save plots
ggsave(filename = png_path, plot = plot, width = width, height = height, units = units, dpi = dpi,bg = bg)
ggsave(filename = pdf_path, plot = plot, width = width, height = height, units = units, dpi = dpi,bg = bg)
}
# Specify colors
ann_colors = list(Tissue = c(OralEpi = "palegreen3" ,Aboral = "mediumpurple1"))
Read in raw count data
counts_raw <- read.csv("../output_RNA/stringtie-GeneExt/LCM_RNA_gene_count_matrix.csv", row.names = 1) #load in data
gene_id,LCM_15,LCM_16,LCM_20,LCM_21,LCM_26,LCM_27,LCM_4,LCM_5,LCM_8,LCM_9
Read in metadata
meta <- read.csv("../data_RNA/LCM_RNA_metadata.csv") %>%
dplyr::arrange(Sample) %>%
mutate(across(c(Tissue, Fragment, Section_Date, LCM_Date), factor)) # Set variables as factors
meta$Tissue <- factor(meta$Tissue, levels = c("OralEpi","Aboral")) #we want OralEpi to be the baseline
Read in gff for gene coordinates
gff <- import("../references/Pocillopora_acuta_HIv2.genes.gff3")
gff_transcripts <- as.data.frame(gff) %>% filter(type == "transcript") %>%
select(c(seqnames,start,end,width,strand,ID)) %>%
dplyr::rename("chromosome"=seqnames, "query"=ID)
Data sanity checks!
stopifnot(all(meta$Sample %in% colnames(counts_raw))) #are all of the sample names in the metadata column names in the gene count matrix?
stopifnot(all(meta$Sample == colnames(counts_raw))) #are they the same in the same order?
ffun<-filterfun(pOverA(0.5,10)) # Keep genes expressed at 10+ counts in at least 50% of samples
counts_filt_poa <- genefilter((counts_raw), ffun) #apply filter
filtered_counts <- counts_raw[counts_filt_poa,] #keep only rows that passed filter
paste0("Number of genes after filtering: ", sum(counts_filt_poa))
## [1] "Number of genes after filtering: 14464"
write.csv(filtered_counts, file = file.path(outdir, "filtered_counts.csv"))
There are now 14464 genes in the filtered dataset.
Data sanity checks:
all(meta$Sample %in% colnames(filtered_counts)) #are all of the sample names in the metadata column names in the gene count matrix?
## [1] TRUE
all(meta$Sample == colnames(filtered_counts)) #are they the same in the same order? Should be TRUE
## [1] TRUE
Create DESeq object and run DESeq2
dds <- DESeqDataSetFromMatrix(countData = filtered_counts,
colData = meta,
design= ~ Fragment + Tissue)
dds <- DESeq(dds)
### Extract results for Aboral vs. OralEpi contrast
res <- results(dds, contrast = c("Tissue","Aboral","OralEpi"))
resLFC <- lfcShrink(dds, coef="Tissue_Aboral_vs_OralEpi", res=res, type = "apeglm")
res <- resLFC
resOrdered <- res[order(res$pvalue),]# save differentially expressed genes
DE_05 <- as.data.frame(resOrdered) %>% filter(padj < 0.05)
DE_05_Up <- DE_05 %>% filter(log2FoldChange > 0) #Higher in Aboral, Lower in OralEpi
DE_05_Down <- DE_05 %>% filter(log2FoldChange < 0) #Lower in Aboral, Higher in OralEpi
nrow(DE_05)
## [1] 3606
nrow(DE_05_Up) #Higher in Aboral, Lower in OralEpi
## [1] 804
nrow(DE_05_Down) #Lower in Aboral, Higher in OralEpi
## [1] 2802
write.csv(as.data.frame(resOrdered),
file = file.path(outdir, "DESeq_results.csv"))
write.csv(DE_05,
file = file.path(outdir, "DEG_05.csv"))
EnhancedVolcano(resLFC,
lab = ifelse(resLFC$padj < 0.05, rownames(resLFC), ""),
x = "log2FoldChange",
y = "pvalue"
)
plotMA(results(dds, contrast = c("Tissue","Aboral","OralEpi")), ylim=c(-20,20))
plotMA(resLFC, ylim=c(-20,20))
resultsNames(dds)
## [1] "Intercept" "Fragment_B_vs_A"
## [3] "Fragment_C_vs_A" "Fragment_D_vs_A"
## [5] "Fragment_E_vs_A" "Tissue_Aboral_vs_OralEpi"
# because we are interested in the comparison and not the intercept, we set 'coef=2'
resNorm <- lfcShrink(dds, coef="Tissue_Aboral_vs_OralEpi", type="normal")
resAsh <- lfcShrink(dds, coef="Tissue_Aboral_vs_OralEpi", type="ashr")
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-20,20)
plotMA(resLFC, xlim=xlim, ylim=ylim, main="apeglm")
plotMA(resNorm, xlim=xlim, ylim=ylim, main="normal")
plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")
plotCounts(dds, gene=which.max(res$log2FoldChange), intgroup="Tissue")
plotCounts(dds, gene=which.min(res$log2FoldChange), intgroup="Tissue")
Transforming count data for visualization
vsd <- vst(dds, blind=FALSE)
rld <- rlog(dds, blind=FALSE)
ntd <- normTransform(dds) # this gives log2(n + 1)
meanSdPlot(assay(vsd), main = "vsd")
meanSdPlot(assay(rld))
meanSdPlot(assay(ntd))
#save the vsd transformation
vsd_mat <- assay(vsd)
write.csv(vsd_mat, file = file.path(outdir, "vsd_expression_matrix.csv"))
Will move forward with vst transformation for visualizations
heatmap_metadata <- as.data.frame(colData(dds)[,c("Tissue","Fragment")])
#view all genes
pheatmap(assay(vsd), cluster_rows=TRUE, show_rownames=FALSE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)),
annotation_colors = ann_colors,color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200))
#view highest count genes
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:20]
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)),
annotation_colors = ann_colors, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200))
#view most significantly differentially expressed genes
select <- order(res$padj)[1:20]
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2, annotation_col=(heatmap_metadata%>% select(Tissue)),
annotation_colors = ann_colors,color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200))
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$Tissue, vsd$Fragment, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)
pcaData <- plotPCA(vsd, intgroup=c("Tissue", "Fragment"), returnData=TRUE, ntop = 14464)
percentVar <- round(100 * attr(pcaData, "percentVar"))
PCA <- ggplot(pcaData, aes(PC1, PC2, color=Tissue, shape=Fragment)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
save_ggplot(PCA, "PCA_allgenes")
PCA_small <- ggplot(pcaData, aes(PC1, PC2, color=Tissue)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
ggsave(filename = paste0(outdir,"/PCA_allgenes_small", ".png"), plot = PCA_small, width = 4, height = 2.5, units = "in", dpi = 300)
pcaData <- plotPCA(vsd, intgroup=c("Tissue", "Fragment"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
PCA <- ggplot(pcaData, aes(PC1, PC2, color=Tissue, shape=Fragment)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
save_ggplot(PCA, "PCA")
PCA_small <- ggplot(pcaData, aes(PC1, PC2, color=Tissue)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
ggsave(filename = paste0(outdir,"/PCA_small", ".png"), plot = PCA_small, width = 4, height = 2.5, units = "in", dpi = 300)
Clearly, the majority of the variance in the data is explained by tissue type!
Download annotation files from genome website
# wget files
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.Conserved_Domain_Search_results.txt.gz
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.EggNog_results.txt.gz
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.KEGG_results.txt.gz
# move to references direcotry
mv *gz ../references
# unzip files
gunzip ../references/*gz
EggNog <- read.delim("../references/Pocillopora_acuta_HIv2.genes.EggNog_results.txt") %>% dplyr::rename("query" = X.query)
CDSearch <- read.delim("../references/Pocillopora_acuta_HIv2.genes.Conserved_Domain_Search_results.txt", quote = "") %>% dplyr::rename("query" = X.Query)
KEGG <- read.delim("../references/Pocillopora_acuta_HIv2.genes.KEGG_results.txt", header = FALSE) %>% dplyr::rename("query" = V1, "KeggTerm" = V2)
DE_05$query <- rownames(DE_05)
DE_05_annot <- DE_05 %>% left_join(CDSearch) %>% select(query,everything())
DE_05_eggnog <- DE_05 %>% left_join(EggNog) %>% select(query,everything())
write.csv(as.data.frame(DE_05_eggnog), file=paste0(outdir,"/DE_05_eggnog_annotation.csv"))
annot_all <- as.data.frame(rownames(dds)) %>% dplyr::rename("query" = `rownames(dds)`) %>% left_join(CDSearch)
eggnog_all <- as.data.frame(rownames(dds)) %>% dplyr::rename("query" = `rownames(dds)`) %>% left_join(EggNog)
gene_labels <- eggnog_all %>% select(query,PFAMs) %>%
mutate_all(~ ifelse(is.na(.), "", .)) %>% #replace NAs with "" for labelling purposes
separate(PFAMs, into = c("PFAMs", "rest of name"), sep = ",(?=.*?,)", extra = "merge")
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[select,"PFAMs"], fontsize_row = 5)
top50_DE
save_ggplot(top50_DE, "top50_DE")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[select,"PFAMs"], fontsize_row = 5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[select,"PFAMs"], fontsize_row = 5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi")
MarkerGenes <- read.csv("../references/Pacuta_MarkerGenes_Levy2021.csv") %>% dplyr::rename("query" = 1, "List" = 2, "definition" = 3) %>% filter(List !="Toolkit")
MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/Pacuta_Spis_Markers_pairs.csv") %>% select(protein_id_spB,cluster,Standardized_Name_spA ) %>% dplyr::rename("query" = 1, "List" = 2)
MarkerGenes$def_short <- ifelse(nchar(MarkerGenes$definition) > 20,
paste0(substr(MarkerGenes$definition, 1, 17), "..."),
MarkerGenes$definition)
Pcomp_MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/combined_Pcomp_markers.csv") %>% select(protein_id_spA,cluster_guess_std,protein_id_spB) %>% dplyr::rename("Pcomp_gene" = protein_id_spA,"query" = protein_id_spB, "List" = cluster_guess_std)
Pcomp_MarkerGenes_broc <- Pcomp_MarkerGenes_broc %>% filter(grepl("Pocillopora",query)&!grepl("unidentified",List)) %>% distinct()
Pcomp_MarkerGenes_broc_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>% select(query,everything()) %>% left_join(Pcomp_MarkerGenes_broc)
Pcomp_MarkerGenes_broc_all_res_Pcomp <- Pcomp_MarkerGenes_broc_all_res %>%
group_by(Pcomp_gene) %>%
slice_min(order_by = padj, with_ties = FALSE) %>%
ungroup()
Pcomp_MarkerGenes_broc_all_res_Pacuta <- Pcomp_MarkerGenes_broc_all_res %>% group_by(query,baseMean,log2FoldChange,lfcSE,pvalue,padj) %>% summarize(List = paste(List, collapse = ";"), .groups="drop")
write.csv(Pcomp_MarkerGenes_broc_all_res_Pcomp, "../output_RNA/differential_expression/Pcomp_snRNA_marker_expr_PacutaLCM.csv")
Pdam_MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/combined_Pdam_markers.csv") %>% select(protein_id_spA,Standardized_Name_spA,protein_id_spB) %>% dplyr::rename("Pdam_gene" = protein_id_spA,"query" = protein_id_spB, "List" = Standardized_Name_spA)
Pdam_MarkerGenes_broc <- Pdam_MarkerGenes_broc %>% filter(grepl("Pocillopora",query)&!grepl("unidentified",List)) %>% distinct()
Pdam_MarkerGenes_broc_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>%
select(query,everything()) %>% left_join(Pdam_MarkerGenes_broc)
Pdam_MarkerGenes_broc_all_res_Pdam <- Pdam_MarkerGenes_broc_all_res %>%
group_by(Pdam_gene) %>%
slice_min(order_by = padj, with_ties = FALSE) %>%
ungroup()
Pdam_MarkerGenes_broc_all_res_Pacuta <- Pdam_MarkerGenes_broc_all_res %>% group_by(query,baseMean,log2FoldChange,lfcSE,pvalue,padj) %>% summarize(List = paste(List, collapse = ";"), .groups="drop")
write.csv(Pdam_MarkerGenes_broc_all_res_Pdam, "../output_RNA/differential_expression/Pdam_snRNA_marker_expr_PacutaLCM.csv")
Mcap_MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/combined_Mcap_markers.csv") %>% select(protein_id_spA,cluster_guess_std,protein_id_spB) %>% dplyr::rename("Mcap_gene" = protein_id_spA,"query" = protein_id_spB, "List" = cluster_guess_std)
Mcap_MarkerGenes_broc <- Mcap_MarkerGenes_broc %>% filter(grepl("Pocillopora",query)&!grepl("unidentified",List)) %>% distinct()
Mcap_MarkerGenes_broc_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>%
select(query,everything()) %>% left_join(Mcap_MarkerGenes_broc)
Mcap_MarkerGenes_broc_all_res_Mcap <- Mcap_MarkerGenes_broc_all_res %>%
group_by(Mcap_gene) %>%
slice_min(order_by = padj, with_ties = FALSE) %>%
ungroup()
Mcap_MarkerGenes_broc_all_res_Pacuta <- Mcap_MarkerGenes_broc_all_res %>% group_by(query,baseMean,log2FoldChange,lfcSE,pvalue,padj) %>% summarize(List = paste(List, collapse = ";"), .groups="drop")
write.csv(Mcap_MarkerGenes_broc_all_res_Mcap, "../output_RNA/differential_expression/Mcap_snRNA_marker_expr_PacutaLCM.csv")
Biomin <- read.csv("../output_RNA/marker_genes/Pacuta_Biomin_Blast.csv") %>% dplyr::rename("query" = Pocillopora_acuta_best_hit) %>% select(-c(accessionnumber.geneID, Ref))
Biomin_broc <- read.csv("../output_RNA/marker_genes/Pacuta_Biomin_Spis_ortholog.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X,accessionnumber_gene_id, ref))
Biomin <- Biomin %>%
group_by(query,List) %>%
summarize(definition = paste(unique(definition), collapse = ","))
Biomin$def_short <- ifelse(nchar(Biomin$definition) > 40,
paste0(substr(Biomin$definition, 1, 37), "..."),
Biomin$definition)
Biomin_filtered_counts <- filtered_counts[(rownames(filtered_counts) %in% Biomin$query),]
Biomin_broc <- Biomin_broc %>%
group_by(query,List) %>%
summarize(definition = paste(unique(definition), collapse = ","))
Biomin_broc$def_short <- ifelse(nchar(Biomin_broc$definition) > 40,
paste0(substr(Biomin_broc$definition, 1, 37), "..."),
Biomin_broc$definition)
Biomin_broc_filtered_counts <- filtered_counts[(rownames(filtered_counts) %in% Biomin_broc$query),]
write.csv(Biomin_filtered_counts, "../output_RNA/differential_expression/Biomin_filtered_counts.csv")
He_etal_Nvec = He, S., Shao, W., Chen, S. (Cynthia), Wang, T. and Gibson, M. C. (2023). Spatial transcriptomics reveals a cnidarian segment polarity program in Nematostella vectensis. Current Biology 33, 2678-2689.e5.
DuBuc_etal_Nvec = DuBuc, T. Q., Stephenson, T. B., Rock, A. Q. and Martindale, M. Q. (2018). Hox and Wnt pattern the primary body axis of an anthozoan cnidarian before gastrulation. Nat Commun 9, 2007.
He_etal_Nvec <- read.csv("../output_RNA/marker_genes/He_etal_nematostella.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X))
He_etal_Nvec$def_short <- gsub("Homeobox protein", "Hox", He_etal_Nvec$Description, ignore.case = TRUE)
DuBuc_etal_Nvec <- read.csv("../output_RNA/marker_genes/Wnt_nematostella.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X))
DuBuc_etal_Nvec$def_short <- DuBuc_etal_Nvec$Gene_Name
HeatStressGenes <- read.csv("../output_RNA/marker_genes/HeatStressGenes_Pacuta.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X))
DE_05$query <- rownames(DE_05)
resOrdered$query <- rownames(resOrdered)
join_genes_of_interest <- function(df, gene_set) {
df %>%
left_join(gene_set, by = "query") %>%
select(query, everything()) %>%
drop_na() %>% distinct()
}
DE_05_biomin <- join_genes_of_interest(DE_05, Biomin)
DE_05_Biomin_broc <- join_genes_of_interest(DE_05, Biomin_broc)
DE_05_marker <- join_genes_of_interest(DE_05, MarkerGenes)
DE_05_marker_broc <- join_genes_of_interest(DE_05, MarkerGenes_broc)
DE_05_He_etal <- join_genes_of_interest(DE_05, He_etal_Nvec)
DE_05_DuBuc_etal <- join_genes_of_interest(DE_05, DuBuc_etal_Nvec)
DESeq_biomin <- join_genes_of_interest(as.data.frame(resOrdered), Biomin)
DESeq_Biomin_broc <- join_genes_of_interest(as.data.frame(resOrdered), Biomin_broc)
DESeq_marker <- join_genes_of_interest(as.data.frame(resOrdered), MarkerGenes)
DESeq_marker_broc <- join_genes_of_interest(as.data.frame(resOrdered), MarkerGenes_broc)
DESeq_He_etal <- join_genes_of_interest(as.data.frame(resOrdered), He_etal_Nvec)
DESeq_DuBuc_etal <- join_genes_of_interest(as.data.frame(resOrdered), DuBuc_etal_Nvec)
DE_05_HeatStressGenes <- HeatStressGenes %>% left_join(DE_05, by="query") %>% select(query,everything()) %>% drop_na(baseMean)
DESeq_HeatStressGenes <- HeatStressGenes %>% left_join(as.data.frame(resOrdered), by="query") %>% select(query,everything()) %>% drop_na(baseMean)
write.csv(as.data.frame(DE_05_biomin), file=paste0(outdir,"/DE_05_biomin_annotation.csv"))
write.csv(as.data.frame(DE_05_marker), file=paste0(outdir,"/DE_05_markergene_annotation.csv"))
biomin_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(Biomin)
biomin_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>% select(query,everything()) %>% left_join(Biomin)
Biomin_broc_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(Biomin_broc)
Biomin_broc_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>% select(query,everything()) %>% left_join(Biomin_broc)
markers_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(MarkerGenes)
markers_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>% select(query,everything()) %>% left_join(MarkerGenes)
broc_markers_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(MarkerGenes_broc)
broc_markers_all_res <- as.data.frame(resLFC) %>% mutate(query = rownames(resLFC)) %>% select(query,everything()) %>% left_join(MarkerGenes_broc)
#view biomin genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_biomin$query, ]), center = TRUE, scale = TRUE))
DE_biomin <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_biomin$def_short, fontsize_row = 5)
DE_biomin
save_ggplot(DE_biomin, "DE_biomin")
#view biomin genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_Biomin_broc$query, ]), center = TRUE, scale = TRUE))
DE_Biomin_broc <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_Biomin_broc$def_short, fontsize_row = 5)
DE_Biomin_broc
save_ggplot(DE_Biomin_broc, "DE_Biomin_broc")
#view marker genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_marker$query, ]), center = TRUE, scale = TRUE))
DE_marker <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,cutree_rows = 5,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker$List, fontsize_row = 4)
DE_marker
save_ggplot(DE_marker, "DE_marker")
DE_05_marker_grouped <- DE_05_marker %>% arrange(List) %>% mutate(List = as.factor(List))
z_scores <- t(scale(t(assay(vsd)[DE_05_marker_grouped$quer, ]), center = TRUE, scale = TRUE))
DE_05_marker_grouped_plot <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker_grouped$List, fontsize_row = 5)
DE_05_marker_grouped_plot
save_ggplot(DE_05_marker_grouped_plot, "DE_05_marker_grouped")
#view marker genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_marker_broc$query, ]), center = TRUE, scale = TRUE))
DE_marker <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,cutree_rows = 5,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker_broc$List, fontsize_row = 2)
DE_marker
save_ggplot(DE_marker, "DE_marker_broc")
DE_05_marker_broc_grouped <- DE_05_marker_broc %>% arrange(List) %>% mutate(List = as.factor(List))
z_scores <- t(scale(t(assay(vsd)[DE_05_marker_broc_grouped$quer, ]), center = TRUE, scale = TRUE))
DE_05_marker_broc_grouped_plot <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker_broc_grouped$List, fontsize_row = 3)
DE_05_marker_broc_grouped_plot
save_ggplot(DE_05_marker_broc_grouped_plot, "DE_05_marker_broc_grouped")
Biomin_volcano <- EnhancedVolcano(biomin_all_res,
lab = biomin_all_res$def_short,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 40)
save_ggplot(Biomin_volcano, "Biomin_volcano")
Biomin_broc_volcano <- EnhancedVolcano(Biomin_broc_all_res,
lab = Biomin_broc_all_res$def_short,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 40)
save_ggplot(Biomin_broc_volcano, "Biomin_broc_volcano")
Marker_volcano <- EnhancedVolcano(markers_all_res,
lab = markers_all_res$List,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(Marker_volcano, "Marker_volcano")
Marker_volcano_names <- EnhancedVolcano(markers_all_res,
lab = markers_all_res$def_short,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(Marker_volcano_names, "Marker_volcano_names")
Marker_volcano <- EnhancedVolcano(broc_markers_all_res,
lab = broc_markers_all_res$List,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(Marker_volcano, "Marker_volcano_broc")
EnhancedVolcano(resLFC,
lab = rownames(resLFC),
x = 'log2FoldChange',
y = 'pvalue')
volcano_plain <- EnhancedVolcano(resLFC,
lab = NA,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
title="",
subtitle="",
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(volcano_plain, "volcano_plain",width = 4, height = 6, units = "in", dpi = 300)
# wget protein sequence reference file
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz
# move to references direcotry
mv *gz ../references
# unzip files
gunzip ../references/*gz
#get the names of all the DEGs from the first column of the DEG csv file
tail -n +2 ../output_RNA/differential_expression/DEG_05.csv | cut -d',' -f1 | tr -d '"' > ../output_RNA/differential_expression/DEG_05_names.csv
#grep this file against the protein fasta file, first with wc -l to make sure the number of lines is correct (should be your number of DEGs)
grep -f ../output_RNA/differential_expression/DEG_05_names.csv ../references/Pocillopora_acuta_HIv2.genes.pep.faa | wc -l
#grep each header with the protein sequence after ("-A 1") and save to a new file
grep -A 1 -f ../output_RNA/differential_expression/DEG_05_names.csv ../references/Pocillopora_acuta_HIv2.genes.pep.faa > ../output_RNA/differential_expression/DEG_05_seqs.txt
On andromeda:
Blastp-ing only the DE genes against the entire nr database (will take a while)
cd ../scripts
nano DEG_05_blast.sh
#!/bin/bash
#SBATCH --job-name="DE_blast"
#SBATCH -t 240:00:00
#SBATCH --export=NONE
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH --mail-user=zdellaert@uri.edu #your email to send notifications
#SBATCH --mem=500GB
#SBATCH --error=../scripts/outs_errs/"%x_error.%j" #write out slurm error reports
#SBATCH --output=../scripts/outs_errs/"%x_output.%j" #write out any program outpus
#SBATCH --account=putnamlab
#SBATCH --nodes=2 --ntasks-per-node=24
module load BLAST+/2.15.0-gompi-2023a
cd ../output_RNA/differential_expression #set working directory
mkdir blast
cd blast
#nr database location andromeda: /data/shared/ncbi-db/.ncbirc
# points to current location: cat /data/shared/ncbi-db/.ncbirc
# [BLAST]
# BLASTDB=/data/shared/ncbi-db/2024-11-10
blastp -query ../DEG_05_seqs.txt -db nr -out DEG_05_blast_results.txt -outfmt 0 -evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 10
blastp -query ../DEG_05_seqs.txt -db nr -out DEG_05_blast_results_tab.txt -outfmt 6 -evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 1 \
-max_hsps 1
echo "Blast complete!" $(date)
sbatch DEG_05_blast.sh
cd ../output_RNA/differential_expression/blast
wc -l DEG_05_blast_results_tab.txt #3537 of the 3606 genes were annotated
#get just the NCBI database accession numbers for the blast results
cut -f2 DEG_05_blast_results_tab.txt > DEG_05_blast_accessions.txt
#remove any duplicates
sort -u DEG_05_blast_accessions.txt > unique_DEG_05_blast_accessions.txt
wc -l unique_DEG_05_blast_accessions.txt #3404 of the 3537 annotations were unique
while read acc; do
curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=$acc&rettype=gp&retmode=text" \
| grep "DEFINITION" | sed 's/DEFINITION //g' | awk -v id="$acc" '{print id "\t" $0}'
done < unique_DEG_05_blast_accessions.txt > DEG_05_blast_names.txt
wc -l DEG_05_blast_names.txt #3396 ; unsure why 8 are missing.
join -1 2 -2 1 -t $'\t' <(sort -k2 DEG_05_blast_results_tab.txt) <(sort DEG_05_blast_names.txt) > annotated_DEG_05_blast_results_tab.txt
On unity:
swissprot based on https://github.com/urol-e5/deep-dive/blob/main/D-Apul/code/20-Apul-gene-annotation.Rmd and https://github.com/urol-e5/deep-dive/blob/main/F-Pmea/code/20-Pmea-gene-annotation.Rmd
Steven’s notebook post here: https://sr320.github.io/tumbling-oysters/posts/sr320-27-go/
mkdir ../references/blast_dbs
cd ../references/blast_dbs
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2024_10_02.fasta.gz
gunzip -k uniprot_sprot_r2024_10_02.fasta.gz
rm uniprot_sprot_r2024_10_02.fasta.gz
head uniprot_sprot_r2024_10_02.fasta
echo "Number of Sequences"
grep -c ">" uniprot_sprot_r2024_10_02.fasta
# 572214 sequences
module load blast-plus/2.14.1
makeblastdb \
-in ../references/blast_dbs/uniprot_sprot_r2024_10_02.fasta \
-dbtype prot \
-out ../references/blast_dbs/uniprot_sprot_r2024_10_02
cd ../scripts
nano blastp_SwissProt.sh
#!/bin/bash
#SBATCH -t 18:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=48
#SBATCH --mem=500GB
#SBATCH --export=NONE
#SBATCH --error=../scripts/outs_errs/"%x_error.%j" #write out slurm error reports
#SBATCH --output=../scripts/outs_errs/"%x_output.%j" #write out any program outpus
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH -D /project/pi_hputnam_uri_edu/zdellaert/LaserCoral #set working directory
module load blast-plus/2.14.1
cd references/
mkdir annotation
fasta="Pocillopora_acuta_HIv2.genes.pep.faa"
blastp \
-query $fasta \
-db blast_dbs/uniprot_sprot_r2024_10_02 \
-out annotation/blastp_SwissProt_out.tab \
-evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6
echo "Blast complete!" $(date)
cd references/annotation/
tr '|' '\t' < blastp_SwissProt_out.tab > blastp_SwissProt_out_sep.tab
cd ../references/annotation/
curl -H "Accept: text/plain; format=tsv" "https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Creviewed%2Cid%2Cprotein_name%2Cgene_names%2Corganism_name%2Clength%2Cgo_p%2Cgo%2Cgo_id%2Cgo_c%2Cgo_f&format=tsv&query=%28reviewed%3Atrue%29" -o SwissProt-Annot-GO_111524.tsv
wc -l SwissProt-Annot-GO_111524.tsv
#572215
All code below based on https://github.com/urol-e5/deep-dive/blob/main/D-Apul/code/20-Apul-gene-annotation.Rmd and https://github.com/urol-e5/deep-dive/blob/main/F-Pmea/code/20-Pmea-gene-annotation.Rmd
Steven’s notebook post here: https://sr320.github.io/tumbling-oysters/posts/sr320-27-go/
bltabl <- read.csv("../references/annotation/blastp_SwissProt_out_sep.tab", sep = '\t', header = FALSE)
spgo <- read.csv("../references/annotation/SwissProt-Annot-GO_111524.tsv", sep = '\t', header = TRUE)
annot_tab <- left_join(bltabl, spgo, by = c("V3" = "Entry")) %>%
select(
query = V1,
blast_hit = V3,
evalue = V13,
ProteinNames = Protein.names,
BiologicalProcess = Gene.Ontology..biological.process.,
GeneOntologyIDs = Gene.Ontology.IDs
)
head(annot_tab)
## query blast_hit evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a Q4JAI4 1.02e-37
## 2 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 O08807 9.62e-116
## 3 Pocillopora_acuta_HIv2___RNAseq.g7985.t1 O74212 3.56e-158
## 4 Pocillopora_acuta_HIv2___TS.g15308.t1 Q09575 1.08e-12
## 5 Pocillopora_acuta_HIv2___RNAseq.g2057.t1 P0C1P0 8.81e-14
## 6 Pocillopora_acuta_HIv2___RNAseq.g4696.t1 Q9W2Q5 8.98e-69
## ProteinNames
## 1 Methionine synthase (EC 2.1.1.-) (Homocysteine methyltransferase)
## 2 Peroxiredoxin-4 (EC 1.11.1.24) (Antioxidant enzyme AOE372) (Peroxiredoxin IV) (Prx-IV) (Thioredoxin peroxidase AO372) (Thioredoxin-dependent peroxide reductase A0372) (Thioredoxin-dependent peroxiredoxin 4)
## 3 Acyl-lipid (8-3)-desaturase (EC 1.14.19.30) (Delta(5) fatty acid desaturase) (Delta-5 fatty acid desaturase)
## 4 Uncharacterized protein K02A2.6
## 5 Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y (Phosphatidylinositol-glycan biosynthesis class Y protein) (PIG-Y)
## 6 Calcium and integrin-binding family member 2
## BiologicalProcess
## 1 methionine biosynthetic process [GO:0009086]; methylation [GO:0032259]
## 2 cell redox homeostasis [GO:0045454]; extracellular matrix organization [GO:0030198]; hydrogen peroxide catabolic process [GO:0042744]; male gonad development [GO:0008584]; negative regulation of male germ cell proliferation [GO:2000255]; protein maturation by protein folding [GO:0022417]; reactive oxygen species metabolic process [GO:0072593]; response to oxidative stress [GO:0006979]; spermatogenesis [GO:0007283]
## 3 long-chain fatty acid biosynthetic process [GO:0042759]; unsaturated fatty acid biosynthetic process [GO:0006636]
## 4 DNA integration [GO:0015074]
## 5 GPI anchor biosynthetic process [GO:0006506]
## 6 calcium ion homeostasis [GO:0055074]; phototransduction [GO:0007602]
## GeneOntologyIDs
## 1 GO:0003871; GO:0008270; GO:0009086; GO:0032259
## 2 GO:0005615; GO:0005737; GO:0005739; GO:0005783; GO:0005790; GO:0005829; GO:0006979; GO:0007283; GO:0008379; GO:0008584; GO:0022417; GO:0030198; GO:0042744; GO:0042802; GO:0045454; GO:0072593; GO:0140313; GO:2000255
## 3 GO:0006636; GO:0016020; GO:0020037; GO:0042759; GO:0046872; GO:0102866
## 4 GO:0003676; GO:0005737; GO:0008270; GO:0015074; GO:0019899; GO:0042575
## 5 GO:0000506; GO:0006506
## 6 GO:0000287; GO:0005509; GO:0005737; GO:0007602; GO:0055074
write.table(annot_tab,
file = "../references/annotation/protein-GO.tsv",
sep = "\t",
row.names = FALSE,
quote = FALSE)
DE_05_SwissProt <- DE_05 %>% left_join(annot_tab) %>% select(query,everything())
write.csv(as.data.frame(DE_05_SwissProt), file=paste0(outdir,"/DE_05_SwissProt_annotation.csv"))
DE_05_SwissProt$short_name <- ifelse(nchar(DE_05_SwissProt$ProteinNames) > 30,
paste0(substr(DE_05_SwissProt$ProteinNames, 1, 27), "..."),
DE_05_SwissProt$ProteinNames)
gene_labels <- DE_05_SwissProt %>%
select(query,short_name) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g6351.t1"
## [2] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [5] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [8] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g3832.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g25431.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g9588.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [18] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g16202.t1"
## [20] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [21] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g26604.t1"
## [24] "Pocillopora_acuta_HIv2___TS.g18100.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g25327.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g19085.t1"
## [30] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [31] "Pocillopora_acuta_HIv2___TS.g11360.t1"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [33] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [37] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g21374.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g5252.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g22853.t1"
## [42] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [44] "Pocillopora_acuta_HIv2___TS.g16802.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [48] "Pocillopora_acuta_HIv2___RNAseq.g26594.t2"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g19115.t1"
## [50] "Pocillopora_acuta_HIv2___TS.g4983.t1"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_DE_SwissProt")
#view most significantly differentially expressed genes by LFC
select <- order(abs(res$log2FoldChange),decreasing = TRUE)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [2] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [5] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [8] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g22261.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [18] "Pocillopora_acuta_HIv2___TS.g4983.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [20] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [21] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [24] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [30] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [31] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [33] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g9631.t1"
## [37] "Pocillopora_acuta_HIv2___RNAseq.g7627.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g8062.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g1102.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g14484.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g21373.t1"
## [42] "Pocillopora_acuta_HIv2___TS.g16384.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g13561.t1"
## [44] "Pocillopora_acuta_HIv2___RNAseq.g1126.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g9210.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.g24649.t1"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g11659.t1"
## [48] "Pocillopora_acuta_HIv2___TS.g25577.t1a"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g27681.t1b"
## [50] "Pocillopora_acuta_HIv2___TS.g1968.t2"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_LFC_DE_SwissProt")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral_SwissProt")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi_SwissProt")
DE_05_SwissProt$short_GO <- ifelse(nchar(DE_05_SwissProt$BiologicalProcess) > 30,
paste0(substr(DE_05_SwissProt$BiologicalProcess, 1, 27), "..."),
DE_05_SwissProt$BiologicalProcess)
gene_labels <- DE_05_SwissProt %>% select(query,short_GO) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
top50_DE
save_ggplot(top50_DE, "top50_DE_Blast2GO")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral_Blast2GO")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi_Blast2GO")
Manual <- read.csv(paste0(outdir,"/DE_05_Manual_annotation.csv")) %>% arrange(X)
Manual$Heatmap_Label <- gsub("Homeobox protein", "Hox", Manual$Heatmap_Label, ignore.case = TRUE)
Manual$Heatmap_Label <- gsub("Protein Wnt", "Wnt", Manual$Heatmap_Label, ignore.case = TRUE)
gene_labels <- Manual %>%
select(query,Heatmap_Label) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g6351.t1"
## [2] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [5] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [8] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g3832.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g25431.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g9588.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [18] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g16202.t1"
## [20] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [21] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g26604.t1"
## [24] "Pocillopora_acuta_HIv2___TS.g18100.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g25327.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g19085.t1"
## [30] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [31] "Pocillopora_acuta_HIv2___TS.g11360.t1"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [33] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [37] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g21374.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g5252.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g22853.t1"
## [42] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [44] "Pocillopora_acuta_HIv2___TS.g16802.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [48] "Pocillopora_acuta_HIv2___RNAseq.g26594.t2"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g19115.t1"
## [50] "Pocillopora_acuta_HIv2___TS.g4983.t1"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_DE_Manual")
#view most significantly differentially expressed genes by LFC
select <- order(abs(res$log2FoldChange),decreasing = TRUE)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [2] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [5] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [8] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g22261.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [18] "Pocillopora_acuta_HIv2___TS.g4983.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [20] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [21] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [24] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [30] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [31] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [33] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g9631.t1"
## [37] "Pocillopora_acuta_HIv2___RNAseq.g7627.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g8062.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g1102.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g14484.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g21373.t1"
## [42] "Pocillopora_acuta_HIv2___TS.g16384.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g13561.t1"
## [44] "Pocillopora_acuta_HIv2___RNAseq.g1126.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g9210.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.g24649.t1"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g11659.t1"
## [48] "Pocillopora_acuta_HIv2___TS.g25577.t1a"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g27681.t1b"
## [50] "Pocillopora_acuta_HIv2___TS.g1968.t2"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_LFC_DE_Manual")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), labels_col = NA, annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 8.5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral_Manual")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), labels_col = NA, annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 8.5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi_Manual")
cd ../references
#download the genome protein fasta if you have not already
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz
#unzip file
gunzip Pocillopora_acuta_HIv2.genes.pep.faa.gz
In unity
salloc -p cpu -c 8 --mem 32G
module load uri/main
module load BLAST+/2.15.0-gompi-2023a
#make blast_dbs directory if you haven't done so above
mkdir blast_dbs
cd blast_dbs
makeblastdb -in ../Pocillopora_acuta_HIv2.genes.pep.faa -out Pacuta_prot -dbtype prot
cd ../../output_RNA/differential_expression/
#make blast output directory if you haven't done so above
mkdir blast
cd blast
nano YinYang.txt
add accession numbers of interest:
XP_048585772.1
XP_048585773.1
XP_048585774.1
NP_996806.2
NP_777560.2
# Read the input file line by line and fetch FASTA sequences
while read -r accession; do
if [[ -n "$accession" ]]; then
echo "Fetching $accession..."
curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=${accession}&rettype=fasta&retmode=text" >> "YinYang.fasta"
echo >> "YinYang.fasta" # Add a newline between sequences
sleep 1 # Avoid hitting rate limits
fi
done < "YinYang.txt"
# run blast with human-readable output
blastp -query YinYang.fasta -db ../../../references/blast_dbs/Pacuta_prot -out YinYang_blast_results.txt -outfmt 0
#looks like there are a lot of matches for each gene! I am going to do a tab search with a very low e-value cutoff:
# run blast with tabular output
blastp -query YinYang.fasta -db ../../../references/blast_dbs/Pacuta_prot -out YinYang_blast_results_tab.txt -outfmt 6 -evalue 1e-25
Great! Interestingly, the same gene, Pocillopora_acuta_HIv2_RNAseq.g25242.t1 is the top match for all 5 proteins I searched. Pocillopora_acuta_HIv2_TS.g24434.t1 is the second best match for all 5. That is interesting! Pocillopora_acuta_HIv2_RNAseq.g7583.t1 is also worth looking at. And Pocillopora_acuta_HIv2_TS.g21338.t1 matched all three YY1 isoforms.
YinYangs <- c("Pocillopora_acuta_HIv2___RNAseq.g25242.t1",
"Pocillopora_acuta_HIv2___TS.g24434.t1",
"Pocillopora_acuta_HIv2___RNAseq.g7583.t1",
"Pocillopora_acuta_HIv2___TS.g21338.t1")
for (i in 1:length(YinYangs)){
plotCounts(dds, gene=YinYangs[i], intgroup=c("Tissue"),)
}
as.data.frame(resOrdered)[YinYangs,]
## baseMean log2FoldChange lfcSE
## Pocillopora_acuta_HIv2___RNAseq.g25242.t1 2388.43282 -0.65267532 0.8125595
## Pocillopora_acuta_HIv2___TS.g24434.t1 86.14232 -0.16148954 1.0080043
## Pocillopora_acuta_HIv2___RNAseq.g7583.t1 429.08727 0.06121494 0.9629085
## Pocillopora_acuta_HIv2___TS.g21338.t1 277.12637 -0.03463576 1.0095974
## pvalue padj
## Pocillopora_acuta_HIv2___RNAseq.g25242.t1 0.1695296 0.3778825
## Pocillopora_acuta_HIv2___TS.g24434.t1 0.2933826 0.5402220
## Pocillopora_acuta_HIv2___RNAseq.g7583.t1 0.8212358 0.9266210
## Pocillopora_acuta_HIv2___TS.g21338.t1 0.2453329 0.4838418
## query
## Pocillopora_acuta_HIv2___RNAseq.g25242.t1 Pocillopora_acuta_HIv2___RNAseq.g25242.t1
## Pocillopora_acuta_HIv2___TS.g24434.t1 Pocillopora_acuta_HIv2___TS.g24434.t1
## Pocillopora_acuta_HIv2___RNAseq.g7583.t1 Pocillopora_acuta_HIv2___RNAseq.g7583.t1
## Pocillopora_acuta_HIv2___TS.g21338.t1 Pocillopora_acuta_HIv2___TS.g21338.t1
Okay! None of these genes are differentially expressed between the tissues. That is interesting and good to know. Pocillopora_acuta_HIv2___RNAseq.g25242.t1 has the highest basal expression of all the potential isoforms of this transcription factor.
DE_05_SwissProt$short_name <- ifelse(nchar(DE_05_SwissProt$ProteinNames) > 80,
paste0(substr(DE_05_SwissProt$ProteinNames, 1, 77), "..."),
DE_05_SwissProt$ProteinNames)
TRP <- Manual %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
select <- TRP$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 12)
top50_DE
save_ggplot(top50_DE, "TRP_DE_SwissProt", width = 6.43, height = 4.25, units = "in", dpi = 300)
annot_tab$short_name <- ifelse(nchar(annot_tab$ProteinNames) > 80,
paste0(substr(annot_tab$ProteinNames, 1, 77), "..."),
annot_tab$ProteinNames)
swissprot_labels <- annot_tab %>%
select(query,short_name) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
TRP <- annot_tab %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
select1 <- TRP$query
select <- match(select1,rownames(vsd))
select <- select[!is.na(select)]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = swissprot_labels[match(select1,swissprot_labels$query),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "TRP_SwissProt")
DE_05_biomin_filtered <- DE_05_biomin %>% left_join(Manual,by="query" ) #%>% filter(!(Heatmap_Label %in% c("Myosin-9", "Actin, cytoplasmic")))
select <- DE_05_biomin_filtered$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
DE_biomin <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
DE_biomin
save_ggplot(DE_biomin, "DE_biomin")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k as needed
# Create a dataframe to manage clusters and reordering
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate heatmap with reordered rows and labels
DE_biomin <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
show_rownames = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8
)
save_ggplot(DE_biomin, "clusters_clean/DE_biomin")
#############################
DE_05_Biomin_broc_filtered <- DE_05_Biomin_broc %>% left_join(Manual,by="query" ) %>% filter(!(Heatmap_Label %in% c("Myosin-9", "Actin, cytoplasmic")))
select <- DE_05_Biomin_broc_filtered$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
DE_Biomin_broc <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
DE_Biomin_broc
save_ggplot(DE_Biomin_broc, "DE_Biomin_broc")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k as needed
# Create a dataframe to manage clusters and reordering
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate heatmap with reordered rows and labels
DE_Biomin_broc <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
show_rownames = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8
)
save_ggplot(DE_Biomin_broc, "clusters_clean/DE_Biomin_broc")
vst_counts <- assay(vsd)["Pocillopora_acuta_HIv2___RNAseq.g15280.t1", ]
df <- as.data.frame(colData(dds))
df$expression <- vst_counts
library(ggplot2)
ggplot(df, aes(x=Tissue, y=expression, group=Fragment, color=Fragment)) +
geom_point(size=3) +
geom_line() +
theme_minimal() +
labs(title="VST Expression by Tissue", y="VST normalized expression")
WNT <- Manual %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE)|grepl("frizzle", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE))#|grepl("wnt", BiologicalProcess, ignore.case = TRUE))
select <- WNT$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "WNT_Hox_DE_SwissProt")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 26 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/WNT_Hox_DE_SwissProt")
WNT_all <- Manual %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE)|grepl("frizzle", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE))#|grepl("wnt", BiologicalProcess, ignore.case = TRUE))
WNT <- Manual %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE))
FRZ <- Manual %>% filter(grepl("frizzle", Heatmap_Label, ignore.case = TRUE))
HOX <- Manual %>% filter(grepl("hox", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE))
select <- WNT$query
gene_layout <- gff_transcripts %>%
filter(query %in% select) %>%
arrange(chromosome, start) %>% left_join(WNT) %>% distinct()
expr_df <- assay(vsd)[select, ] %>%
as.data.frame() %>%
rownames_to_column("query") %>%
pivot_longer(-query, names_to = "Sample", values_to = "vsd_expr") %>%
left_join(as.data.frame(colData(vsd)), by = "Sample") %>%
group_by(query, Tissue) %>%
summarise(mean_expr = mean(vsd_expr), .groups = "drop") %>%
pivot_wider(names_from = Tissue, values_from = mean_expr)
plot_df <- gene_layout %>%
left_join(expr_df, by = "query")
plot_df <- plot_df %>%
mutate(highest_expr_tissue = case_when(
OralEpi > Aboral ~ "OralEpi",
Aboral > OralEpi ~ "Aboral",
TRUE ~ "equal"
))
plot_df <- plot_df %>%
mutate(chromosome_split = case_when(
chromosome == "Pocillopora_acuta_HIv2___xfSc0000014" & start < 3.6e6 ~ "xfSc0000014_A",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000014" & start >= 3.65e6 ~ "xfSc0000014_B",
TRUE ~ chromosome
))
plot_df$Heatmap_Label <- gsub("Protein ","",plot_df$Heatmap_Label)
heat_df <- plot_df %>%
select(query, start, end, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start + end) / 2) # for positioning tile center
heat_df <- heat_df %>%
left_join(plot_df %>% select(query, chromosome_split), by = "query") %>%
mutate(chromosome = chromosome_split)
library(gggenes)
library(ggnewscale)
Wnt_chr <- ggplot(plot_df, aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df,
aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x")+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
Wnt_chr
save_ggplot(Wnt_chr, "Wnt_chromosome")
Wnt_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 0.5) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df,
aes(x = (start + end)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 4, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome_split, scales = "free_x") +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot(Wnt_chr_heatmap, "Wnt_chromosome_expression",width = 20, height = 5)
# Shift gene coordinates within each chromosome
plot_df_shifted <- plot_df %>%
group_by(chromosome) %>%
mutate(
chr_start = min(start),
start_shifted = start - chr_start + 10, # +10 if you want to avoid 0
end_shifted = end - chr_start + 10
) %>%
ungroup()
heat_df_shifted <- plot_df_shifted %>%
select(query, start_shifted, end_shifted, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start_shifted + end_shifted) / 2) # for positioning tile center
Wnt_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df_shifted,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 0.5) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df_shifted,
aes(xmin = start_shifted, xmax = end_shifted, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df_shifted,
aes(x = (start_shifted + end_shifted)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 4, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome, scales = "fixed",ncol=1) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot(Wnt_chr_heatmap, "Wnt_chromosome_expression_length_preserved",width = 30, height = 5)
HOX <- Manual %>% filter(grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE))
select <- HOX$query
gene_layout <- gff_transcripts %>%
filter(query %in% select) %>%
arrange(chromosome, start) %>% left_join(HOX) %>% distinct()
expr_df <- assay(vsd)[select, ] %>%
as.data.frame() %>%
rownames_to_column("query") %>%
pivot_longer(-query, names_to = "Sample", values_to = "vsd_expr") %>%
left_join(as.data.frame(colData(vsd)), by = "Sample") %>%
group_by(query, Tissue) %>%
summarise(mean_expr = mean(vsd_expr), .groups = "drop") %>%
pivot_wider(names_from = Tissue, values_from = mean_expr)
plot_df <- gene_layout %>%
left_join(expr_df, by = "query")
plot_df <- plot_df %>%
mutate(highest_expr_tissue = case_when(
OralEpi > Aboral ~ "OralEpi",
Aboral > OralEpi ~ "Aboral",
TRUE ~ "equal"
))
plot_df <- plot_df %>%
mutate(chromosome_split = case_when(
chromosome == "Pocillopora_acuta_HIv2___xfSc0000002" & start < 2.2e6 ~ "xfSc0000002_A",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000002" & start >= 6.4e6 ~ "xfSc0000002_B",
chromosome == "Pocillopora_acuta_HIv2___Sc0000011" & start < 2e6 ~ "Sc0000011_A",
chromosome == "Pocillopora_acuta_HIv2___Sc0000011" & start >= 2e6 ~ "Sc0000011_B",
chromosome == "Pocillopora_acuta_HIv2___Sc0000024" & start < 420000 ~ "Sc0000024_A",
chromosome == "Pocillopora_acuta_HIv2___Sc0000024" & start >= 2300000 ~ "Sc0000024_B",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000008" & start < 2e6 ~ "xfSc0000008_A",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000008" & start >= 2e6 ~ "xfSc0000008_B",
TRUE ~ chromosome
))
heat_df <- plot_df %>%
select(query, start, end, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start + end) / 2) # for positioning tile center
heat_df <- heat_df %>%
left_join(plot_df %>% select(query, chromosome_split), by = "query") %>%
mutate(chromosome = chromosome_split)
library(gggenes)
library(ggnewscale)
HOX_chr <- ggplot(plot_df[1:6,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome")
HOX_chr <- ggplot(plot_df[7:12,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome2")
HOX_chr <- ggplot(plot_df[13:19,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome3")
HOX_chr <- ggplot(plot_df[20:24,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome4")
HOX_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 0.5) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df,
aes(x = (start + end)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 2 , fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome_split, scales = "free_x") +
theme_minimal() +
theme(
axis.text.y = element_text(size = 5),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +#scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot(HOX_chr_heatmap, "Hox_chromosome_expression",width = 20, height = 5)
# Shift gene coordinates within each chromosome
plot_df_shifted <- plot_df %>%
group_by(chromosome_split) %>%
mutate(
chr_start = min(start),
start_shifted = start - chr_start + 10, # +10 if you want to avoid 0
end_shifted = end - chr_start + 10
) %>%
ungroup()
heat_df_shifted <- plot_df_shifted %>%
select(query, start_shifted, end_shifted, width,chromosome_split, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start_shifted + end_shifted) / 2) # for positioning tile center
HOX_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df_shifted,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 1) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df_shifted,
aes(xmin = start_shifted, xmax = end_shifted, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df_shifted,
aes(x = (start_shifted + end_shifted)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 4, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome_split, scales = "fixed",ncol=1) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot_big <- function(plot, filename, width = 10, height = 7, units = "in", dpi = 300) {
png_path <- file.path(outdir, paste0(filename, ".png"))
pdf_dir <- file.path(outdir, "pdf_figs")
pdf_path <- file.path(pdf_dir, paste0(filename, ".pdf"))
# Ensure the pdf_figs directory exists
if (!dir.exists(pdf_dir)) dir.create(pdf_dir, recursive = TRUE)
# Save plots
ggsave(filename = png_path, plot = plot, width = width, height = height, units = units, dpi = dpi,limitsize = FALSE)
ggsave(filename = pdf_path, plot = plot, width = width, height = height, units = units, dpi = dpi,limitsize = FALSE)
}
save_ggplot_big(HOX_chr_heatmap, "Hox_chromosome_expression_length_preserved",width = 15, height = 25)
select <- unique(DE_05_He_etal$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "He_etal_Nvec_DE_SwissProt")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 4 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/He_etal_Nvec_DE_SwissProt")
#### all
labels <- DESeq_He_etal$def_short
select <- DESeq_He_etal$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = labels, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "He_etal_Nvec_all_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = labels,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 9 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/He_etal_Nvec_all_SwissProt")
gene_layout <- gff_transcripts %>%
filter(query %in% select) %>%
arrange(chromosome, start) %>% left_join(DESeq_He_etal) %>% distinct()
expr_df <- assay(vsd)[select, ] %>%
as.data.frame() %>%
rownames_to_column("query") %>%
pivot_longer(-query, names_to = "Sample", values_to = "vsd_expr") %>%
left_join(as.data.frame(colData(vsd)), by = "Sample") %>%
group_by(query, Tissue) %>%
summarise(mean_expr = mean(vsd_expr), .groups = "drop") %>%
pivot_wider(names_from = Tissue, values_from = mean_expr)
plot_df <- gene_layout %>%
left_join(expr_df, by = "query")
plot_df <- plot_df %>%
mutate(highest_expr_tissue = case_when(
OralEpi > Aboral ~ "OralEpi",
Aboral > OralEpi ~ "Aboral",
TRUE ~ "equal"
))
heat_df <- plot_df %>%
select(query, start, end, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start + end) / 2) # for positioning tile center
library(gggenes)
library(ggnewscale)
Hox_chr <- ggplot(plot_df, aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_gene_label(aes(label = Gene_Name), size = 50) +
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome, scales = "free_x", ncol = 1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
Hox_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df,
aes(x = midpoint, y = tissue, fill = expression),colour="black",
height = 0.4) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_gene_label(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_", label = Gene_Name),
size = 50) +
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome, scales = "free_x", ncol = 1) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +
labs(x = "Genomic position")
save_ggplot_big(Hox_chr, "He_etal_Nvec_all_SwissProt_chromosome", width = 20, height = 40)
save_ggplot_big(Hox_chr_heatmap, "He_etal_Nvec_all_SwissProt_chromosome_expression", width = 20, height = 40)
select <- unique(DE_05_DuBuc_etal$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "DuBuc_etal_Nvec_DE_SwissProt")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white"
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/DuBuc_etal_Nvec_DE_SwissProt")
DESeq_He_etal$source <- "He_etal_2023"
DESeq_DuBuc_etal$source <- "DuBuc_etal_2018"
Hox_Literature <- rbind(DESeq_He_etal,DESeq_DuBuc_etal)
DESeq_Hox_Literature_collapsed <- Hox_Literature %>%
group_by(query, baseMean, log2FoldChange, lfcSE, pvalue, padj) %>%
summarize(
Gene_Name = paste(unique(Gene_Name), collapse = "; "),
Description = paste(unique(Description), collapse = "; "),
def_short = paste(unique(def_short), collapse = "; "),
source = paste(unique(source), collapse = "; "),
.groups = "drop"
)
DE_05_Hox_Literature_collapsed <- DESeq_Hox_Literature_collapsed %>% filter(padj<0.05)
WNT_HOX_MAN <- Manual %>% select(-X) %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE)|grepl("frizzle", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE)) %>% select(-c(blast_hit,evalue,ProteinNames,BiologicalProcess,GeneOntologyIDs))# %>% rename(Description = Heatmap_Label)
COMBINED_HOX_WNT_all <- full_join(DESeq_Hox_Literature_collapsed,WNT_HOX_MAN,by=c("query")) %>%
mutate(
baseMean = coalesce(baseMean.x, baseMean.y),
log2FoldChange = coalesce(log2FoldChange.x, log2FoldChange.y),
lfcSE = coalesce(lfcSE.x, lfcSE.y),
pvalue = coalesce(pvalue.x, pvalue.y),
padj = coalesce(padj.x, padj.y)
) %>%
select(-ends_with(".x"), -ends_with(".y"))
COMBINED_HOX_WNT_DE05 <- full_join(DE_05_Hox_Literature_collapsed,WNT_HOX_MAN,by=c("query")) %>%
mutate(
baseMean = coalesce(baseMean.x, baseMean.y),
log2FoldChange = coalesce(log2FoldChange.x, log2FoldChange.y),
lfcSE = coalesce(lfcSE.x, lfcSE.y),
pvalue = coalesce(pvalue.x, pvalue.y),
padj = coalesce(padj.x, padj.y)
) %>%
select(-ends_with(".x"), -ends_with(".y"))
select <- unique(COMBINED_HOX_WNT_DE05$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "Hox_All_DE")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(3,6,18,26,28,37)
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/Hox_All_DE")
COMBINED_HOX_WNT_all <- COMBINED_HOX_WNT_all %>% mutate(Heatmap_Label = coalesce(Heatmap_Label, def_short))
COMBINED_HOX_WNT_all$Heatmap_Label <- gsub("Protein Wnt", "Wnt", COMBINED_HOX_WNT_all$Heatmap_Label, ignore.case = TRUE)
select <- unique(COMBINED_HOX_WNT_all$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = COMBINED_HOX_WNT_all$Heatmap_Label, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "Hox_All")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = COMBINED_HOX_WNT_all$Heatmap_Label,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(3,6,22,30,32,43)
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/Hox_All")
mucin <- Manual %>% filter(grepl("mucin", ProteinNames, ignore.case = TRUE)|grepl("toll-", ProteinNames, ignore.case = TRUE)|grepl("ZP", ProteinNames, ignore.case = TRUE)|grepl("lectin", ProteinNames, ignore.case = TRUE)|grepl("nitric", Heatmap_Label, ignore.case = TRUE)) %>% filter(Heatmap_Label !="Cnidocyte marker protein (Collectin-11)")
select <- unique(mucin$query)
select <- select[(select %in% rownames(vsd))]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "bacteria_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate the heatmap with reordered rows and labels
mucins <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(24,38)
)
# Save the heatmap
save_ggplot(mucins, "clusters_clean/bacteria_DE_SwissProt")
sensors <- Manual %>% filter(grepl("TRP", Heatmap_Label, ignore.case = TRUE)|grepl("cellular response to light stimulus", BiologicalProcess, ignore.case = TRUE)|grepl("detection of mechanical stimulus involved in sensory perception", BiologicalProcess, ignore.case = TRUE))
select <- sensors$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 3)
top50_DE
save_ggplot(top50_DE, "sensors_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 3) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
mutate(cluster = factor(cluster, levels = c(1, 3, 2))) %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate the heatmap with reordered rows and labels
transporters_heat <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = c(10,18) # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(transporters_heat, "clusters_clean/sensors_DE_SwissProt")
select <- DE_05_HeatStressGenes %>% arrange(response_type) %>% select(query,gene_name,response_type) %>% unique()
rownames(select) <- select$query
z_scores <- t(scale(t(assay(vsd)[select$query, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_row = (select %>% select(response_type)), annotation_colors = ann_colors,
labels_row = select$gene_name, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "HeatStress_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select$query,
Heatmap_Label = select$gene_name,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
annotation_row = (select %>% select(response_type)),
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 4 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/HeatStress_DE_SwissProt")
select <- DESeq_HeatStressGenes %>% arrange(response_type) %>% select(query,gene_name,response_type) %>%
group_by(query) %>% #collapse duplicate rows
summarise(
gene_name = paste(unique(gene_name), collapse = "; "),
response_type = paste(unique(response_type), collapse = "; "),
.groups = "drop"
) %>% arrange(response_type,gene_name)
select <- data.frame(select)
rownames(select) <- select$query
z_scores <- t(scale(t(assay(vsd)[select$query, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_row = (select %>% select(response_type)), annotation_colors = ann_colors,
labels_row = select$gene_name, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "HeatStress_all_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select$query,
Heatmap_Label = select$gene_name,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
annotation_row = (select %>% select(response_type)),
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 4 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/HeatStress_all_SwissProt")
# Function to generate short names for proteins
generate_short_name <- function(data) {
data %>%
mutate(short_name = ifelse(nchar(ProteinNames) > 60,
paste0(substr(ProteinNames, 1, 57), "..."),
ProteinNames))
}
# Function to create gene labels
create_gene_labels <- function(data) {
data %>%
select(query, short_name) %>%
mutate_all(~ ifelse(is.na(.), "", .)) # Replace NAs with ""
}
# Function to generate z-scores for selected genes
calculate_z_scores <- function(data, selection, vsd_matrix) {
selected_genes <- match(selection, rownames(vsd_matrix))
selected_genes <- selected_genes[!is.na(selected_genes)] # Remove NAs
t(scale(t(assay(vsd_matrix)[selected_genes, ]), center = TRUE, scale = TRUE))
}
# Function to create and save heatmap
create_heatmap <- function(z_scores, labels_row, annotation_col, annotation_colors, filename) {
heatmap <- pheatmap(z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = annotation_col,
annotation_colors = annotation_colors,
labels_row = labels_row,
fontsize_row = 6)
save_ggplot(heatmap, filename)
}
# Process DE_05_SwissProt
DE_05_SwissProt <- generate_short_name(DE_05_SwissProt)
gene_labels <- create_gene_labels(DE_05_SwissProt)
TRP <- DE_05_SwissProt %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
select <- TRP$query
z_scores <- calculate_z_scores(DE_05_SwissProt, select, vsd)
create_heatmap(z_scores,
labels_row = gene_labels[match(select, gene_labels$query), 2],
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "TRP_DE_SwissProt")
# Process annot_tab
annot_tab <- generate_short_name(annot_tab)
gene_labels <- create_gene_labels(annot_tab)
TRP <- annot_tab %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
TRP <- left_join(TRP, as.data.frame(resOrdered)) %>% filter(!is.na(log2FoldChange))
select1 <- TRP$query
z_scores <- calculate_z_scores(annot_tab, select1, vsd)
create_heatmap(z_scores,
labels_row = TRP$short_name,
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "TRP_SwissProt")
significant <- ifelse(row.names(z_scores) %in% DE_05$query, "Significant", "Not Significant")
# Add this as a new annotation
row_annotation <- data.frame(Significance = significant)
rownames(row_annotation) <- rownames(z_scores)
row_annotation_colors <- list(Significance = c("Significant" = "red", "Not Significant" = "grey"))
heatmap <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
cutree_cols = 2,
cutree_rows = 5,
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
annotation_row = row_annotation,
annotation_row_colors = row_annotation_colors,
labels_row = TRP$short_name,
fontsize_row = 12)
save_ggplot(heatmap, "TRP_SwissProt")
# Process DE_05_SwissProt
DE_05_SwissProt <- generate_short_name(DE_05_SwissProt)
gene_labels <- create_gene_labels(DE_05_SwissProt)
GFP <- DE_05_SwissProt %>% filter(grepl("biolum", BiologicalProcess, ignore.case = TRUE))
select <- GFP$query
z_scores <- calculate_z_scores(DE_05_SwissProt, select, vsd)
create_heatmap(z_scores,
labels_row = gene_labels[match(select, gene_labels$query), 2],
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "GFP_DE_SwissProt")
# Process annot_tab
annot_tab <- generate_short_name(annot_tab)
gene_labels <- create_gene_labels(annot_tab)
GFP <- annot_tab %>% filter(grepl("biolum", BiologicalProcess, ignore.case = TRUE))
select1 <- GFP$query
z_scores <- calculate_z_scores(annot_tab, select1, vsd)
create_heatmap(z_scores,
labels_row = gene_labels[match(select1, gene_labels$query), 2],
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "GFP_SwissProt")
Annot_Pdam <- read.csv("../references/annotation/blastp_Pdam_out.tab", sep = '\t', header = FALSE) %>% select(c(1,2)) %>% dplyr::rename("protein_id" = "V2", "query" = "V1")
Manual_Pdam <- left_join(Manual, Annot_Pdam)
library(readxl)
larval_aboral_enriched <- read_excel("../output_RNA/marker_genes/RamonMateu_Pdam_larval.xlsx", sheet = "pocillopora_aboral_enriched_05_")
Manual_Pdam_upaboral <- Manual_Pdam %>% filter(log2FoldChange > 0)
inner_join(larval_aboral_enriched, Manual_Pdam_upaboral, by=c("gene ID"="protein_id")) %>% dim()
## [1] 18 26
#18 genes overlap from the 126 in the paper
larval_oral_enriched <- read_excel("../output_RNA/marker_genes/RamonMateu_Pdam_larval.xlsx", sheet = "pocillopora_oral_enriched_05_lf")
Manual_Pdam_uporal <- Manual_Pdam %>% filter(log2FoldChange < 0)
inner_join(larval_oral_enriched, Manual_Pdam_uporal, by=c("gene ID"="protein_id")) %>% dim()
## [1] 6 26
inner_join(larval_oral_enriched, Manual_Pdam_uporal, by=c("gene ID"="protein_id")) %>% print()
## # A tibble: 6 × 26
## `gene ID` baseMean.x log2FoldChange.x lfcSE.x stat pvalue.x padj.x
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 XP_027046532.1 88.9 4.29 0.652 5.04 4.54e- 7 8.03e- 5
## 2 XP_027036583.1 563. 3.04 0.287 7.12 1.06e-12 4.11e-10
## 3 XP_027054215.1 918. 2.57 0.275 5.71 1.14e- 8 2.77e- 6
## 4 XP_027054196.1 855. 2.49 0.263 5.65 1.59e- 8 3.76e- 6
## 5 XP_027058731.1 1372. 2.44 0.270 5.35 8.95e- 8 1.85e- 5
## 6 XP_027040620.1 1782. 1.83 0.241 3.45 5.66e- 4 4.98e- 2
## # ℹ 19 more variables: `pfam domain` <chr>, `signal peptide` <chr>,
## # `astroides ortholog` <chr>, `astroides oral` <chr>,
## # `clytia ortholog` <chr>, `clytia oral` <chr>, X <int>, query <chr>,
## # baseMean.y <dbl>, log2FoldChange.y <dbl>, lfcSE.y <dbl>, pvalue.y <dbl>,
## # padj.y <dbl>, Heatmap_Label <chr>, blast_hit <chr>, evalue <dbl>,
## # ProteinNames <chr>, BiologicalProcess <chr>, GeneOntologyIDs <chr>
#only 6/83 genes overlap
larval_aboral_clusters <- read_excel("../output_RNA/marker_genes/RamonMateu_Pdam_larval_scRNA.xlsx") %>% filter(!is.na(pocillopora))
larval_aboral_clusters_Pacuta <- inner_join(larval_aboral_clusters, Manual_Pdam, by=c("pocillopora"="protein_id"))
#my genes upregulated in oral tissue are high in mucous cells, cool
After you’ve confirmed your code works as expected, use renv::snapshot() to record the packages and their sources in the lockfile.
renv::snapshot()